Diet (dis)-similarity

Author

Max Lindmark

Published

2024-03-22

Load packages

home <- here::here()

# Load libraries
library(tidyverse)
library(tidylog)
library(RCurl)
library(gllvm)
library(RColorBrewer)
library(ggrepel)
library(vegan)
library(patchwork)
library(RCurl)
library(devtools)
library(ggpubr)
library(janitor)
library(brms)
library(modelr)
library(tidybayes)
library(ggstats)
library(broom)
library(broom.mixed)

# Source map-plot
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/map-plot.R")
#source(paste0(home, "R/functions/map-plot.R"))

# Source code for rotating ordination plot (to make it work in ggplot)
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/rotate.R")
#source(paste0(home, "R/functions/rotate.R"))

Read data

d <- read_csv(paste0(home, "/data/clean/aggregated_stomach_data.csv"))

Diet similarity using bar plots and ordination

Ordination using gllvm

d2 <- d %>%
  dplyr::select(ends_with("_tot"))

d2$pred_id <- d$pred_id

d2 <- d2 %>%
  left_join(d %>% dplyr::select(pred_length_cm, pred_weight_g, pred_id, species), by = "pred_id")
left_join: added 3 columns (pred_length_cm, pred_weight_g, species)
           > rows only in x       0
           > rows only in y  (    0)
           > matched rows     9,649
           >                 =======
           > rows total       9,649
# Calculate relative prey weight and average by size-class
d3 <- d2 %>% 
  pivot_longer(ends_with("_tot")) %>% 
  mutate(value = value/pred_weight_g) %>% 
  filter(pred_length_cm >= 10 & pred_length_cm <= 50) %>% # These are the sizes we work with
  #filter(value < quantile(value, probs = 0.995)) %>% 
  # mutate(value = ifelse(value > quantile(value, probs = 0.975),
  #                       quantile(value, probs = 0.975),
  #                       value)) %>%
  mutate(pred_length_cm = cut_width(pred_length_cm, 2),
         pred_length_cm = str_remove(pred_length_cm, "[(]")) %>% 
  separate_wider_delim(pred_length_cm, delim = ",", names = c("pred_length_cm", "scrap")) %>% 
  mutate(pred_length_cm = ifelse(pred_length_cm == "[9", "9", pred_length_cm),
         pred_length_cm = as.numeric(pred_length_cm)) %>% 
  dplyr::select(-scrap) %>% 
  group_by(species, pred_length_cm, name) %>% 
  summarise(tot_prey_weight = mean(value)) %>%
  ungroup() %>% 
  mutate(name = str_replace(name, "_", " "),
         name = str_replace(name, "_", " "),
         name = str_remove(name, " tot"),
         name = str_to_title(name))
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
filter: removed 7,065 rows (5%), 137,670 rows remaining
summarise: now 585 rows and 4 columns, 2 group variables remaining (species, pred_length_cm)
ungroup: no grouping variables
d3 %>% filter(tot_prey_weight > 0) %>% arrange(tot_prey_weight)
filter: removed 174 rows (30%), 411 rows remaining
# A tibble: 411 × 4
   species  pred_length_cm name            tot_prey_weight
   <chr>             <dbl> <chr>                     <dbl>
 1 Cod                  43 Polychaeta         0.0000000895
 2 Cod                  35 Non Bio            0.000000214 
 3 Cod                  47 Amphipoda          0.000000283 
 4 Cod                  45 Polychaeta         0.000000292 
 5 Cod                  41 Bivalvia           0.000000365 
 6 Flounder             33 Mysidae            0.000000366 
 7 Cod                  43 Amphipoda          0.000000406 
 8 Cod                  45 Bivalvia           0.000000930 
 9 Flounder             25 Clupea Harengus    0.00000102  
10 Cod                  41 Amphipoda          0.00000104  
# ℹ 401 more rows
d_wide <- d3 %>% pivot_wider(names_from = name, values_from = tot_prey_weight)
pivot_wider: reorganized (name, tot_prey_weight) into (Amphipoda, Bivalvia, Clupea Harengus, Clupeidae, Gadus Morhua, …) [was 585x4, now 39x17]
d_mat <- d_wide %>%
  dplyr::select(-species, -pred_length_cm) %>%
  as.matrix()

nrow(d_mat)
[1] 39
# Fit gllvm model with two latend variables and no covariates, meaning it becomes an ordination
set.seed(999)

fit <- gllvm(y = d_mat, family = "tweedie", num.lv = 2)
Note that, the tweedie family is implemented using the extended variational approximation method. 
fit
Call: 
gllvm(y = d_mat, family = "tweedie", num.lv = 2)
family: 
[1] "tweedie"
method: 
[1] "VA"

log-likelihood:  2249.647 
Residual degrees of freedom:  526 
AIC:  -4381.294 
AICc:  -4367.808 
BIC:  -4123.369 
# Let's do a ggplot-residual instead
res <- residuals(fit)
p <- NCOL(fit$y)
sppind <- 1:p
ds_res <- res$residuals[, sppind]

# Compare with built-in plot from gllvm: plot(fit, which = 2)
ds_res %>% 
  as.data.frame() %>% 
  pivot_longer(cols = everything()) %>% 
  ggplot(aes(sample = value)) + 
  geom_qq(size = 0.8) + 
  geom_qq_line() + 
  labs(y = "Sample quantiles",
       x = "Theoretical quantiles")
pivot_longer: reorganized (Amphipoda, Bivalvia, Clupea Harengus, Clupeidae, Gadus Morhua, …) into (name, value) [was 39x15, now 585x2]

ggsave(paste0(home, "/figures/supp/qq_ordi_gllvm.pdf"), width = 11, height = 11, units = "cm")

# Plot with GGplot
# Now make the ordination plot. We want to use ggplot, so need to hack it a bit
# See this thread: https://github.com/JenniNiku/gllvm/discussions/116
# ordiplot(fit, biplot = TRUE)

# Extract site scores
LVs = as.data.frame(getLV(fit))

# Bind LV site scores to metadata
# LVs = cbind(LVs, sim.meta)
# 
# ordiplot(fittw, biplot = TRUE)
# 
# ordiplot(fittw, biplot = TRUE, symbols = TRUE, spp.colors = "white")
# 
LVs2 <- rotate(fit)

# text?
labs <- LVs2$species %>%
  as.data.frame() %>% 
  rownames_to_column(var = "prey")

dd <- LVs2$sites %>%
  as.data.frame() %>% 
  bind_cols(d_wide %>% dplyr::select(species, pred_length_cm)) %>% 
  mutate(group = "Flounder",
         group = ifelse(species == "Cod" & pred_length_cm <=  25, "Small cod", group),
         group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group))

# Alternative palettes for the groups
pal <- (brewer.pal(n = 11, name = "RdYlBu")[c(11, 4, 1)])
#pal <- (brewer.pal(n = 11, name = "RdYlGn")[c(11, 4, 1)])
#pal <- (brewer.pal(n = 8, name = "Dark2")[c(8, 7, 6)])
#pal <- brewer.pal(n = 8, name = "Dark2")[c(2, 7, 6)]

ordi <- ggplot(dd, aes(x = V1, y = V2, color = factor(group, levels = c("Flounder", "Small cod", "Large cod")),
                       size = pred_length_cm)) +
  geom_point(alpha = 0.6) + 
  # stat_ellipse(aes(V1, V2, color = group), 
  #              inherit.aes = FALSE, linewidth = 1, alpha = 0.3) + 
  scale_radius(range = c(0.8, 4)) +
  scale_color_manual(values = pal) +
  scale_fill_manual(values = pal) + 
  labs(size = "Predator length [cm]", shape = "Species", fill = "Group", color = "",
       x = "Latent variable 1", y = "Latent variable 2") +
  geom_label_repel(data = labs, aes(V1, V2, label = prey),
                   color = "grey20", inherit.aes = FALSE, size = 2, alpha = 0.4,
                   box.padding = 0.25) +
  theme_sleek() +
  coord_cartesian(xlim = c(-3, 4), ylim = c(-3, 3)) + 
  guides(color = guide_legend(ncol = 4),
         size = guide_legend(ncol = 4)) + 
  theme(legend.position = "bottom",
        legend.box = "vertical",
        legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))

ordi

Make a plot of the ontogenetic development of diets

# Calculate relative prey weight and average by size-class
d3 <- d2 %>% 
  pivot_longer(ends_with("_tot")) %>% 
  mutate(value = value/pred_weight_g) %>% 
  group_by(species, pred_length_cm, name) %>% 
  summarise(tot_prey_weight = mean(value)) %>%
  ungroup() %>% 
  mutate(name = str_replace(name, "_", " "),
         name = str_replace(name, "_", " "),
         name = str_remove(name, " tot"),
         name = str_to_title(name)) %>% 
  filter(tot_prey_weight < quantile(tot_prey_weight, probs = 0.995))
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
summarise: now 1,545 rows and 4 columns, 2 group variables remaining (species, pred_length_cm)
ungroup: no grouping variables
filter: removed 8 rows (1%), 1,537 rows remaining
max_size_cod <- 65

cod_important_prey <- d3 %>%
  filter(species == "Cod") %>% 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_cod, max_size_cod -1, pred_length_cm)) %>% 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>% 
  group_by(name, predator_length_grp) %>%
  summarise(prey_group_tot = sum(tot_prey_weight)) %>% 
  ungroup() %>% 
  group_by(predator_length_grp) %>% 
  mutate(prop = prey_group_tot / sum(prey_group_tot)) %>% 
  ungroup() %>%
  mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
         max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
         max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size),
         predator = "Cod")
filter: removed 509 rows (33%), 1,028 rows remaining
summarise: now 195 rows and 3 columns, one group variable remaining (name)
ungroup: no grouping variables
ungroup: no grouping variables
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
max_size_fle <- 40

fle_important_prey <- d3 %>%
  filter(species == "Flounder") %>% 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_fle, max_size_fle-1, pred_length_cm)) %>% 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>% 
  group_by(name, predator_length_grp) %>%
  summarise(prey_group_tot = sum(tot_prey_weight)) %>% 
  ungroup() %>% 
  group_by(predator_length_grp) %>% 
  mutate(prop = prey_group_tot / sum(prey_group_tot)) %>% 
  ungroup() %>%
  mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
         max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
         max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size),
         predator = "Flounder")
filter: removed 1,028 rows (67%), 509 rows remaining
summarise: now 105 rows and 3 columns, one group variable remaining (name)
ungroup: no grouping variables
ungroup: no grouping variables
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
area_plot <- bind_rows(fle_important_prey, cod_important_prey) %>% 
  mutate(prop = replace_na(prop, 0))

n_cat <- nrow(area_plot %>% distinct(name))
distinct: removed 285 rows (95%), 15 rows remaining
colourCount <- n_cat
getPalette <- colorRampPalette(brewer.pal(12, "Paired"))
pal2 <- getPalette(colourCount)

area_plot %>% distinct(name)
distinct: removed 285 rows (95%), 15 rows remaining
# A tibble: 15 × 1
   name              
   <chr>             
 1 Amphipoda         
 2 Bivalvia          
 3 Clupea Harengus   
 4 Clupeidae         
 5 Gadus Morhua      
 6 Gobiidae          
 7 Mysidae           
 8 Non Bio           
 9 Other             
10 Other Crustacea   
11 Other Pisces      
12 Platichthys Flesus
13 Polychaeta        
14 Saduria Entomon   
15 Sprattus Sprattus 
# Dataframes for geom_text with sample size
n_cod <- d2 %>%
  filter(species == "Cod") %>% 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_cod, max_size_cod -1, pred_length_cm)) %>% 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>% 
  group_by(predator_length_grp) %>%
  summarise(n = length(unique(pred_id)))
filter: removed 3,882 rows (40%), 5,767 rows remaining
summarise: now 13 rows and 2 columns, ungrouped
n_fle <- d2 %>%
  filter(species == "Flounder") %>% 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_fle, max_size_fle-1, pred_length_cm)) %>% 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>% 
  group_by(predator_length_grp) %>%
  summarise(n = length(unique(pred_id)))
filter: removed 5,767 rows (60%), 3,882 rows remaining
summarise: now 7 rows and 2 columns, ungrouped
n_dat <- bind_rows(n_cod %>% mutate(predator = "Cod"), 
                   n_fle %>% mutate(predator = "Flounder")) %>% 
  mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
         max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
         max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
bar_diet <- 
  ggplot(data = area_plot, aes(x = max_size, y = prop, fill = name, color = name)) +
  geom_col(width = 4.3) +
  geom_text(data = n_dat, aes(x = max_size, y = 1.08, label = n), inherit.aes = FALSE,
            size = 0, color = "white") +
  geom_text(data = n_dat, aes(x = max_size, y = 1.04, label = n), inherit.aes = FALSE,
            size = 2) +
  facet_wrap(~predator, scales = "free") +
  scale_fill_manual(values = pal2, name = "") +
  scale_color_manual(values = pal2, name = "") +
  coord_cartesian(expand = 0) +
  scale_x_continuous(breaks = seq(0, 100, 5)) +
  labs(y = "Proportion", x = "Max. predator size in group [cm]") +
  theme(legend.key.size = unit(0.2, 'cm'),
        legend.position = "bottom",
        legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))

Combine plots

bar_diet / ordi + plot_annotation(tag_levels = "A") #+ plot_layout(heights = c(1, 1.6))

ggsave(paste0(home, "/figures/ordi_diet.pdf"), width = 17, height = 21, units = "cm")

Schoener’s overlap index

# Calculate relative prey weight and average by size-class
year_rect_sum <- d2 %>% 
  pivot_longer(ends_with("_tot")) %>% 
  filter(pred_length_cm > 10 & pred_length_cm < 50) %>% # These are the sizes we work with
  left_join(d %>% dplyr::select(year, subdiv, ices_rect, X, Y, pred_id), by = "pred_id") %>% 
  mutate(group = "Flounder",
         group = ifelse(species == "Cod" & pred_length_cm <=  25, "Small cod", group),
         group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group)) %>% 
  group_by(year, ices_rect, group) %>%
  summarise(n = length(unique(pred_id)),
            tot_prey = sum(value)) %>% 
  ungroup() %>% 
  group_by(year, ices_rect) %>% 
  mutate(min_stom = min(n)) %>% 
  ungroup() %>%
  mutate(id = paste(year, ices_rect, group, sep = "_")) %>% 
  dplyr::select(-year, -ices_rect, -group) %>% 
  filter(n > 3)
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
filter: removed 8,475 rows (6%), 136,260 rows remaining
left_join: added 5 columns (year, subdiv, ices_rect, X, Y)
           > rows only in x         0
           > rows only in y  (    565)
           > matched rows     136,260
           >                 =========
           > rows total       136,260
summarise: now 280 rows and 5 columns, 2 group variables remaining (year, ices_rect)
ungroup: no grouping variables
ungroup: no grouping variables
filter: removed 34 rows (12%), 246 rows remaining
# year_rect_sum %>% arrange(n) %>% as.data.frame()
# year_rect_sum %>% arrange(min_stom) %>% as.data.frame()
# summary(year_rect_sum$n)

diet_prop <- d2 %>% 
  pivot_longer(ends_with("_tot")) %>% 
  filter(pred_length_cm > 10 & pred_length_cm < 50) %>% # These are the sizes we work with
  left_join(d %>% dplyr::select(year, subdiv, ices_rect, X, Y, pred_id), by = "pred_id") %>% 
  mutate(group = "Flounder",
         group = ifelse(species == "Cod" & pred_length_cm <=  25, "Small cod", group),
         group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group)) %>% 
  group_by(year, ices_rect, group, name) %>%
  summarise(tot_prey_by_grp = sum(value)) %>% 
  ungroup() %>%
  mutate(id = paste(year, ices_rect, group, sep = "_")) %>% 
  filter(id %in% unique(year_rect_sum$id)) %>% 
  left_join(year_rect_sum, by = "id") %>% 
  mutate(prop_prey = tot_prey_by_grp / tot_prey)
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
filter: removed 8,475 rows (6%), 136,260 rows remaining
left_join: added 5 columns (year, subdiv, ices_rect, X, Y)
           > rows only in x         0
           > rows only in y  (    565)
           > matched rows     136,260
           >                 =========
           > rows total       136,260
summarise: now 4,200 rows and 5 columns, 3 group variables remaining (year, ices_rect, group)
ungroup: no grouping variables
filter: removed 510 rows (12%), 3,690 rows remaining
left_join: added 3 columns (n, tot_prey, min_stom)
           > rows only in x       0
           > rows only in y  (    0)
           > matched rows     3,690
           >                 =======
           > rows total       3,690
# Calculate overlap by year and ices rect and species group. Make the proportions wide!
#unique(diet_prop$group)
diet_prop_wide <- diet_prop %>% 
  dplyr::select(-tot_prey_by_grp, -tot_prey, -id, -n) %>% 
  pivot_wider(names_from = group, values_from = prop_prey) %>% 
  mutate(abs_f_sc = abs(Flounder - `Small cod`),
         abs_f_lc = abs(Flounder - `Large cod`),
         abs_lc_sc = abs(`Large cod` - `Small cod`)) %>% 
  group_by(year, ices_rect) %>% 
  summarise(`Flounder\nSmall cod` = 1 - 0.5*sum(abs_f_sc),
            `Flounder\nLarge cod` = 1 - 0.5*sum(abs_f_lc),
            `Small cod\nLarge cod` = 1 - 0.5*sum(abs_lc_sc))
pivot_wider: reorganized (group, prop_prey) into (Flounder, Large cod, Small cod) [was 3690x6, now 1500x7]
summarise: now 100 rows and 5 columns, one group variable remaining (year)
diet_prop_wide$latitude <- mapplots::ices.rect(diet_prop_wide$ices_rect)$lat
diet_prop_wide$longitude <- mapplots::ices.rect(diet_prop_wide$ices_rect)$lon

diet_prop_wide <- sdmTMB::add_utm_columns(diet_prop_wide)
Warning: Multiple UTM zones detected.
Proceeding with the most common value.
You may wish to choose a different projection.
Proceeding with UTM zone 33N; CRS = 32633.
Visit https://epsg.io/32633 to verify.
ovr <- diet_prop_wide %>%
  pivot_longer(c(`Flounder\nSmall cod`, `Flounder\nLarge cod`, `Small cod\nLarge cod`),
               names_to = "overlap_group", values_to = "overlap") %>% 
  drop_na(overlap)
pivot_longer: reorganized (Flounder
Small cod, Flounder
Large cod, Small cod
Large cod) into (overlap_group, overlap) [was 100x9, now 300x8]
drop_na (grouped): removed 94 rows (31%), 206 rows remaining
ovr
# A tibble: 206 × 8
# Groups:   year [8]
    year ices_rect latitude longitude     X     Y overlap_group          overlap
   <dbl> <chr>        <dbl>     <dbl> <dbl> <dbl> <chr>                    <dbl>
 1  2015 40G4          55.8      14.5  469. 6178. "Flounder\nSmall cod"  0.0721 
 2  2015 40G4          55.8      14.5  469. 6178. "Flounder\nLarge cod"  0.0210 
 3  2015 40G4          55.8      14.5  469. 6178. "Small cod\nLarge cod" 0.0346 
 4  2015 40G6          55.8      16.5  594. 6179. "Flounder\nSmall cod"  0.00160
 5  2015 40G6          55.8      16.5  594. 6179. "Flounder\nLarge cod"  0.00240
 6  2015 40G6          55.8      16.5  594. 6179. "Small cod\nLarge cod" 0.347  
 7  2015 41G7          56.2      17.5  655. 6237. "Flounder\nSmall cod"  0.230  
 8  2015 41G7          56.2      17.5  655. 6237. "Flounder\nLarge cod"  0.0948 
 9  2015 41G7          56.2      17.5  655. 6237. "Small cod\nLarge cod" 0.222  
10  2015 41G8          56.2      18.5  717. 6239. "Flounder\nSmall cod"  0.105  
# ℹ 196 more rows
# Plot diet overlap
# plot_map +
#   geom_sf(color = "gray80") + 
#   geom_point(data = ovr, aes(X*1000, Y*1000, color = overlap),
#              size = 10) + 
#   viridis::scale_color_viridis() +
#   geom_sf() + 
#   facet_wrap(~overlap_group, ncol = 2) + 
#   NULL
  
set.seed(99)
ps <- ggplot(ovr, aes(overlap_group, overlap, color = ices_rect)) + 
  geom_jitter(height = 0, width = 0.1, alpha = 0.3) + 
  geom_boxplot(aes(overlap_group, overlap),
               inherit.aes = FALSE, fill = NA, width = 0.2, alpha = 0.2, size = 0.6) + 
  viridis::scale_color_viridis(discrete = TRUE, name = "ICES\nrectangle") + 
  geom_hline(yintercept = 0.6, linetype = 2, alpha = 0.6) + 
  labs(y = "Schoener's overlap index",
       x = "") + 
  theme(legend.position = "bottom",
        legend.key.size = unit(0.01, "cm"),
        legend.text = element_text(size = 7),
        legend.title = element_text(size = 8),
        legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))

ps

# summary(ovr$overlap)
# 
ovr %>% filter(overlap == 0)
filter (grouped): removed 202 rows (98%), 4 rows remaining
# A tibble: 4 × 8
# Groups:   year [3]
   year ices_rect latitude longitude     X     Y overlap_group         overlap
  <dbl> <chr>        <dbl>     <dbl> <dbl> <dbl> <chr>                   <dbl>
1  2018 42G6          56.8      16.5  592. 6291. "Flounder\nLarge cod"       0
2  2021 43G6          57.2      16.5  591. 6346. "Flounder\nSmall cod"       0
3  2021 43G6          57.2      16.5  591. 6346. "Flounder\nLarge cod"       0
4  2022 42G6          56.8      16.5  592. 6291. "Flounder\nLarge cod"       0

How does the diet overlap relate to biomass density of the species?

Trawl data

## Read in trawl data. There's one file for cod and one for flounder
# They go up to 2020 Q1
# trawl_surveys_cod <- read.csv("data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) COD.csv", sep = ";")
# trawl_surveys_fle <- read.csv("data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) FLE.csv", sep = ";")

trawl_surveys_cod <- read_delim(paste0(home, "/data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) COD.csv"), delim = ";")
Rows: 9199 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr  (10): Species, Index, Appr. status, Validity, Station name, ICES, Dist....
dbl  (13): Year, Quarter, Month, Seqno, Haul, Subsam, Processing, Size, Subd...
num   (7): Lat, Long, Bottom depth, Doorspread, Opening, Tot. no./hour, No./...
lgl   (4): Sex, Headropedepth, DNA sampleID, NumberToSample
date  (1): Date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_fle <- read_delim(paste0(home, "/data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) FLE.csv"), delim = ";")
Rows: 7306 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr  (10): Species, Index, Appr. status, Validity, Station name, ICES, Dist....
dbl  (13): Year, Quarter, Month, Seqno, Haul, Subsam, Processing, Size, Subd...
num   (7): Lat, Long, Bottom depth, Doorspread, Opening, Tot. no./hour, No./...
lgl   (4): Sex, Headropedepth, DNA sampleID, NumberToSample
date  (1): Date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine for the two species, filter and clean!
trawl_data <- rbind(trawl_surveys_cod, trawl_surveys_fle) %>%
  clean_names() %>%
  mutate(swept_area = as.numeric(gsub(",", "\\.", swept_area)),
         kg_hour = as.numeric(gsub(",", "\\.", kg_hour)),
         dist = as.numeric(gsub(",", "\\.", dist))) %>% 
  as.data.frame()

str(trawl_data)
'data.frame':   16505 obs. of  35 variables:
 $ species            : chr  "cod" "cod" "cod" "cod" ...
 $ date               : Date, format: "2015-02-26" "2015-02-26" ...
 $ year               : num  2015 2015 2015 2015 2015 ...
 $ quarter            : num  1 1 1 1 1 1 1 1 1 1 ...
 $ month              : num  2 2 2 2 2 2 2 2 2 2 ...
 $ index              : chr  "OXBH.2015.2" "OXBH.2015.2" "OXBH.2015.2" "OXBH.2015.2" ...
 $ appr_status        : chr  "Locked" "Locked" "Locked" "Locked" ...
 $ seqno              : num  10048 10048 10048 10048 10048 ...
 $ haul               : num  2 2 2 2 2 2 2 2 2 2 ...
 $ validity           : chr  "V" "V" "V" "V" ...
 $ station_name       : chr  "SLAGGENABBEN" "SLAGGENABBEN" "SLAGGENABBEN" "SLAGGENABBEN" ...
 $ subsam             : num  0 0 0 0 0 0 0 0 0 0 ...
 $ processing         : num  0 0 0 0 0 0 0 0 0 0 ...
 $ size               : num  9 9 9 9 9 9 9 9 9 9 ...
 $ sex                : logi  NA NA NA NA NA NA ...
 $ subdiv             : num  25 25 25 25 25 25 25 25 25 25 ...
 $ rect               : num  3959 3959 3959 3959 3959 ...
 $ ices               : chr  "39G4" "39G4" "39G4" "39G4" ...
 $ lat                : num  55289 55289 55289 55289 55289 ...
 $ long               : num  143628 143628 143628 143628 143628 ...
 $ bottom_depth       : num  603 603 603 603 603 603 603 603 603 603 ...
 $ duration           : num  30 30 30 30 30 30 30 30 30 30 ...
 $ dist               : num  1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 ...
 $ doorspread         : num  875 875 875 875 875 875 875 875 875 875 ...
 $ opening            : num  56 56 56 56 56 56 56 56 56 56 ...
 $ headropedepth      : logi  NA NA NA NA NA NA ...
 $ swept_area         : num  0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 ...
 $ kg_hour            : num  1606 1606 1606 1606 1606 ...
 $ tot_no_hour        : num  57467 57467 57467 57467 57467 ...
 $ lengthcl           : num  200 210 220 230 240 250 260 270 280 290 ...
 $ no_hour            : num  552 368 736 184 1472 ...
 $ length_measure_type: chr  "Total length TL" "Total length TL" "Total length TL" "Total length TL" ...
 $ dna_sample_id      : logi  NA NA NA NA NA NA ...
 $ number_to_sample   : logi  NA NA NA NA NA NA ...
 $ smhi_serial_no     : num  1 1 1 1 1 1 1 1 1 1 ...
sort(unique(trawl_data$lat)) %>% head(20)
 [1] 5541 5542 5608 5613 5619 5629 5632 5634 5703 5706 5712 5717 5721 5722 5723
[16] 5728 5729 5743 5746 5750
# For these coordinates, we can use the function Fede provided:
format.position <- function(x){
  sign.x <- sign(x)
  x <- abs(x)
  x <- ifelse(nchar(x)==3, paste("0",x,sep=""), x)
  x <- ifelse(nchar(x)==2, paste("00",x,sep=""), x)
  x <- ifelse(nchar(x)==1, paste("000",x,sep=""), x)
  dec.x <- as.numeric(paste(substring(x,1,2)))+as.numeric(paste(substring(x,3,4)))/60
  dec.x <- sign.x*dec.x
}

# Apply function
trawl_data$lat <- format.position(trawl_data$lat)
trawl_data$lon <- format.position(trawl_data$long)

trawl_data <- sdmTMB::add_utm_columns(trawl_data, ll_names = c("lon", "lat"))
Warning: Multiple UTM zones detected.
Proceeding with the most common value.
You may wish to choose a different projection.
Proceeding with UTM zone 33N; CRS = 32633.
Visit https://epsg.io/32633 to verify.
# SMHI serial no?
t <- trawl_data %>% drop_na(smhi_serial_no)
drop_na: removed 2,390 rows (14%), 14,115 rows remaining
plot_map + 
  geom_point(data = trawl_data, aes(X*1000, Y*1000, color = factor(year)),
             alpha = 0.5, size = 0.3) +
  theme_sleek(base_size = 8)
Warning: `guide_colourbar()` needs continuous scales.
Warning: Removed 75 rows containing missing values or values outside the scale range
(`geom_point()`).

Read and join the biologial data

trawl_surveys_s_cod <- read_delim(paste0(home, "/data/stomach-data/BITS/biological/Trawl Surveys (S) COD.csv"), delim = ";")
Rows: 10381 Columns: 33
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr   (9): Appr. status, Sampling type, Validity, Station name, ICES, Specie...
dbl  (22): Year, Quarter, Month, Trip No, Seqno, Haul, Subsam, Processing, S...
lgl   (1): Maturity No (SMSF)
date  (1): Date  

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_s_flounder <- read_delim(paste0(home, "/data/stomach-data/BITS/biological/Trawl Surveys (S) FLE.csv"), delim = ";")
Rows: 13946 Columns: 33
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr   (9): Appr. status, Sampling type, Validity, Station name, ICES, Specie...
dbl  (20): Year, Quarter, Month, Trip No, Seqno, Haul, Subsam, Processing, S...
lgl   (3): Age quality, Age grading, Maturity No (SMSF)
date  (1): Date  

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_s_cod$Species <- "Cod"
trawl_surveys_s_flounder$Species <- "Flounder"

bio_data <- bind_rows(trawl_surveys_s_cod, trawl_surveys_s_flounder) %>%
  clean_names()

# Join the trawl, bio and stomach data. First create a unique ID.
# In earlier versions I had a column called otolith number (fish ID!), which was really fish id, but it isn't here anymore.

# Add a new species code in the trawl data that matches the stomach data 
trawl_data <- trawl_data %>% mutate(predator_code = ifelse(species == "cod", "COD", "FLE"))

bio_data <- bio_data %>% mutate(predator_code = ifelse(species == "Cod", "COD", "FLE"))

unique(is.na(trawl_data[, c("year", "quarter", "haul")]))
      year quarter  haul
[1,] FALSE   FALSE FALSE
# Go ahead
trawl_data$haul_id <- paste(trawl_data$year,
                            trawl_data$quarter,
                            trawl_data$haul,
                            sep = "_")

# Should be a unique ID per length and predator code
trawl_data %>% 
  group_by(haul_id, lengthcl, predator_code) %>% 
  mutate(n = n()) %>% 
  ungroup() %>% 
  distinct(n)
ungroup: no grouping variables
distinct: removed 16,504 rows (>99%), one row remaining
# A tibble: 1 × 1
      n
  <int>
1     1
# Now we want the cpue of a species-size combination as a column, then make it distinct per haul
trawl_data_unique_haul <- trawl_data %>% 
  dplyr::select(-species, -lengthcl, -predator_code, -kg_hour, -tot_no_hour, -no_hour, -length_measure_type, -sex, -long) %>% # Remove any column that has anything to do with catch, because that info should come from the species dataframes below. I.e., after left_joining this and the catch data, any 0 zero catches should be NA in the joined data
  distinct(haul_id, .keep_all = TRUE)
distinct: removed 15,851 rows (96%), 654 rows remaining
trawl_data_cod <- trawl_data %>% 
  filter(!validity == "I") %>% 
  filter(predator_code == "COD") %>% 
  mutate(kg = kg_hour * (duration / 60),
         kg_km2 = kg / swept_area) %>% # make it biomass density
  drop_na(kg_km2) %>% 
  mutate(size_group = ifelse(lengthcl >= 100 & lengthcl < 250, "small", NA),
         size_group = ifelse(lengthcl >= 250 & lengthcl < 500, "medium", size_group)) %>% 
  group_by(haul_id, size_group) %>% 
  summarise(kg_km2 = sum(kg_km2)) %>% 
  filter(size_group %in% c("small", "medium")) %>% 
  pivot_wider(names_from = size_group, values_from = kg_km2) %>% 
  rename(mcod_kg_km2 = medium,
         scod_kg_km2 = small) %>% 
  mutate(mcod_kg_km2 = replace_na(mcod_kg_km2, 0),
         scod_kg_km2 = replace_na(scod_kg_km2, 0))
filter: removed 32 rows (<1%), 16,473 rows remaining
filter: removed 7,290 rows (44%), 9,183 rows remaining
drop_na: removed 169 rows (2%), 9,014 rows remaining
summarise: now 1,130 rows and 3 columns, one group variable remaining (haul_id)
filter (grouped): removed 253 rows (22%), 877 rows remaining
pivot_wider: reorganized (size_group, kg_km2) into (medium, small) [was 877x3, now 468x3]
rename: renamed 2 variables (mcod_kg_km2, scod_kg_km2)
# Ok, so because this is catches (no hauls with no fish... the only reason I have 0 catches is if one of the size-groups doesn't have a catch!)

# Check with a haul_id
trawl_data %>% 
  filter(predator_code == "COD" & haul_id == "2015_1_20") 
filter: removed 16,500 rows (>99%), 5 rows remaining
  species       date year quarter month        index appr_status seqno haul
1     cod 2015-02-28 2015       1     2 OXBH.2015.20      Locked 10057   20
2     cod 2015-02-28 2015       1     2 OXBH.2015.20      Locked 10057   20
3     cod 2015-02-28 2015       1     2 OXBH.2015.20      Locked 10057   20
4     cod 2015-02-28 2015       1     2 OXBH.2015.20      Locked 10057   20
5     cod 2015-02-28 2015       1     2 OXBH.2015.20      Locked 10057   20
  validity     station_name subsam processing size sex subdiv rect ices
1        V 11 S HOBURG BANK      0          0    9  NA     26 4163 41G8
2        V 11 S HOBURG BANK      0          0    9  NA     26 4163 41G8
3        V 11 S HOBURG BANK      0          0    9  NA     26 4163 41G8
4        V 11 S HOBURG BANK      0          0    9  NA     26 4163 41G8
5        V 11 S HOBURG BANK      0          0    9  NA     26 4163 41G8
       lat   long bottom_depth duration dist doorspread opening headropedepth
1 56.38333 182938          377       30  1.5        849       6            NA
2 56.38333 182938          377       30  1.5        849       6            NA
3 56.38333 182938          377       30  1.5        849       6            NA
4 56.38333 182938          377       30  1.5        849       6            NA
5 56.38333 182938          377       30  1.5        849       6            NA
  swept_area kg_hour tot_no_hour lengthcl no_hour length_measure_type
1      0.472   4.508          10      260       2     Total length TL
2      0.472   4.508          10      330       2     Total length TL
3      0.472   4.508          10      350       2     Total length TL
4      0.472   4.508          10      400       2     Total length TL
5      0.472   4.508          10      410       2     Total length TL
  dna_sample_id number_to_sample smhi_serial_no      lon        X        Y
1            NA               NA             19 18.48333 715.0414 6254.191
2            NA               NA             19 18.48333 715.0414 6254.191
3            NA               NA             19 18.48333 715.0414 6254.191
4            NA               NA             19 18.48333 715.0414 6254.191
5            NA               NA             19 18.48333 715.0414 6254.191
  predator_code   haul_id
1           COD 2015_1_20
2           COD 2015_1_20
3           COD 2015_1_20
4           COD 2015_1_20
5           COD 2015_1_20
trawl_data_cod
# A tibble: 468 × 3
# Groups:   haul_id [468]
   haul_id   mcod_kg_km2 scod_kg_km2
   <chr>           <dbl>       <dbl>
 1 2015_1_10   19145.         6892. 
 2 2015_1_12    7867.         2503. 
 3 2015_1_14   12351.         6444. 
 4 2015_1_15    3899.         2481. 
 5 2015_1_17    3583.         1792. 
 6 2015_1_2    38232.         8311. 
 7 2015_1_20      23.9           0  
 8 2015_1_21       0.401         0  
 9 2015_1_22     203.           18.4
10 2015_1_25      30.3          10.1
# ℹ 458 more rows
# Now do the same for flounder and join with the cod data, then with haul data!
# Different code because we do not need to worry about size
trawl_data_fle <- trawl_data %>% 
  filter(!validity == "I") %>% 
  filter(predator_code == "FLE") %>% 
  mutate(kg = kg_hour * (duration / 60),
         kg_km2 = kg / swept_area) %>% # make it biomass density
  #drop_na(kg_km2) %>% 
  mutate(size_group = ifelse(lengthcl >= 100, "benthic", "all")) %>% 
  group_by(haul_id, size_group) %>% 
  summarise(fle_kg_km2 = sum(kg_km2)) %>% 
  filter(size_group == "benthic")
filter: removed 32 rows (<1%), 16,473 rows remaining
filter: removed 9,183 rows (56%), 7,290 rows remaining
summarise: now 642 rows and 3 columns, one group variable remaining (haul_id)
filter (grouped): removed 152 rows (24%), 490 rows remaining
# Add back summaries to dataframe of unique hauls
trawl_data_unique_haul <- trawl_data_unique_haul %>% filter(!validity == "I")
filter: removed 16 rows (2%), 638 rows remaining
trawl_data_wide <- left_join(trawl_data_unique_haul, trawl_data_cod, by = "haul_id")
left_join: added 2 columns (mcod_kg_km2, scod_kg_km2)
           > rows only in x   170
           > rows only in y  (  0)
           > matched rows     468
           >                 =====
           > rows total       638
trawl_data_wide <- left_join(trawl_data_wide, trawl_data_fle, by = "haul_id")
left_join: added 2 columns (size_group, fle_kg_km2)
           > rows only in x   148
           > rows only in y  (  0)
           > matched rows     490
           >                 =====
           > rows total       638
# Change the NA's to 0's... 
trawl_data_wide <- trawl_data_wide %>% 
  mutate(scod_kg_km2 = replace_na(scod_kg_km2, 0),
         mcod_kg_km2 = replace_na(mcod_kg_km2, 0),
         fle_kg_km2 = replace_na(fle_kg_km2, 0))

# Now add in the same ID in the bio_data
unique(is.na(bio_data[, c("year", "quarter", "haul")]))
      year quarter  haul
[1,] FALSE   FALSE FALSE
# Go ahead
bio_data$haul_id <- paste(bio_data$year,
                          bio_data$quarter,
                          bio_data$haul,
                          sep = "_")
                                
unique(bio_data$haul_id) %>% head(20)
 [1] "2015_1_94"  "2022_4_249" "2022_4_278" "2021_1_85"  "2022_1_53" 
 [6] "2018_4_9"   "2016_4_43"  "2017_1_73"  "2017_4_2"   "2017_4_4"  
[11] "2017_4_6"   "2019_4_90"  "2022_1_82"  "2016_1_55"  "2022_1_99" 
[16] "2022_4_246" "2022_4_254" "2022_4_279" "2021_1_80"  "2022_1_57" 
# Now join in trawl data into the bio data (and then into stomach data)
colnames(bio_data)
 [1] "date"                "year"                "quarter"            
 [4] "month"               "trip_no"             "appr_status"        
 [7] "sampling_type"       "seqno"               "haul"               
[10] "validity"            "station_name"        "subsam"             
[13] "processing"          "size"                "max_maturity"       
[16] "subdiv"              "rect"                "ices"               
[19] "species"             "lengthcl"            "fishno"             
[22] "age"                 "age_quality"         "age_grading"        
[25] "weight"              "sex"                 "maturity"           
[28] "maturity_no_smsf"    "stomach_sample_code" "specimen_note"      
[31] "individual_no"       "spec_tsn"            "day_night"          
[34] "predator_code"       "haul_id"            
colnames(trawl_data_wide)
 [1] "date"             "year"             "quarter"          "month"           
 [5] "index"            "appr_status"      "seqno"            "haul"            
 [9] "validity"         "station_name"     "subsam"           "processing"      
[13] "size"             "subdiv"           "rect"             "ices"            
[17] "lat"              "bottom_depth"     "duration"         "dist"            
[21] "doorspread"       "opening"          "headropedepth"    "swept_area"      
[25] "dna_sample_id"    "number_to_sample" "smhi_serial_no"   "lon"             
[29] "X"                "Y"                "haul_id"          "mcod_kg_km2"     
[33] "scod_kg_km2"      "size_group"       "fle_kg_km2"      
# Check first for overlapping columns, and if so, if one of the datasets has any NAs
common_cols <- intersect(colnames(bio_data), colnames(trawl_data_wide))

unique(is.na(trawl_data_wide[, common_cols]))
      date  year quarter month appr_status seqno  haul validity station_name
[1,] FALSE FALSE   FALSE FALSE       FALSE FALSE FALSE    FALSE        FALSE
[2,] FALSE FALSE   FALSE FALSE       FALSE FALSE FALSE    FALSE        FALSE
     subsam processing  size subdiv  rect  ices haul_id
[1,]  FALSE      FALSE FALSE  FALSE FALSE FALSE   FALSE
[2,]   TRUE       TRUE  TRUE  FALSE FALSE FALSE   FALSE
unique(is.na(bio_data[, common_cols]))
      date  year quarter month appr_status seqno  haul validity station_name
[1,] FALSE FALSE   FALSE FALSE       FALSE FALSE FALSE    FALSE        FALSE
     subsam processing  size subdiv  rect  ices haul_id
[1,]  FALSE      FALSE FALSE  FALSE FALSE FALSE   FALSE
# Trawl data has some NA's in the common columns. Select only the important columns
colnames(trawl_data_wide)[!colnames(trawl_data_wide) %in% colnames(bio_data)]
 [1] "index"            "lat"              "bottom_depth"     "duration"        
 [5] "dist"             "doorspread"       "opening"          "headropedepth"   
 [9] "swept_area"       "dna_sample_id"    "number_to_sample" "smhi_serial_no"  
[13] "lon"              "X"                "Y"                "mcod_kg_km2"     
[17] "scod_kg_km2"      "size_group"       "fle_kg_km2"      
trawl_data_wide <- trawl_data_wide %>% dplyr::select(haul_id, fle_kg_km2, mcod_kg_km2, scod_kg_km2, lat, lon, bottom_depth, X, Y, smhi_serial_no)

bio_data <- left_join(bio_data, trawl_data_wide, by = "haul_id")
left_join: added 9 columns (fle_kg_km2, mcod_kg_km2, scod_kg_km2, lat, lon, …)
           > rows only in x        0
           > rows only in y  (   146)
           > matched rows     24,327
           >                 ========
           > rows total       24,327
names(bio_data)
 [1] "date"                "year"                "quarter"            
 [4] "month"               "trip_no"             "appr_status"        
 [7] "sampling_type"       "seqno"               "haul"               
[10] "validity"            "station_name"        "subsam"             
[13] "processing"          "size"                "max_maturity"       
[16] "subdiv"              "rect"                "ices"               
[19] "species"             "lengthcl"            "fishno"             
[22] "age"                 "age_quality"         "age_grading"        
[25] "weight"              "sex"                 "maturity"           
[28] "maturity_no_smsf"    "stomach_sample_code" "specimen_note"      
[31] "individual_no"       "spec_tsn"            "day_night"          
[34] "predator_code"       "haul_id"             "fle_kg_km2"         
[37] "mcod_kg_km2"         "scod_kg_km2"         "lat"                
[40] "lon"                 "bottom_depth"        "X"                  
[43] "Y"                   "smhi_serial_no"     

Summarise haul data on the ICES rectangle level to join into the diet

# Now calculate spatial overlap in biomass density... 
year_rect_sum_biom <- bio_data %>% 
  dplyr::select(fle_kg_km2, mcod_kg_km2, scod_kg_km2, ices, year) %>% 
  rename(ices_rect = ices) %>% #,
  group_by(year, ices_rect) %>%
  summarise(mean_fle = mean(fle_kg_km2),
            mean_scod = mean(scod_kg_km2),
            mean_mcod = mean(mcod_kg_km2)) 
rename: renamed one variable (ices_rect)
summarise: now 117 rows and 5 columns, one group variable remaining (year)
# Join into diet data
ovr <- ovr %>% 
  pivot_wider(names_from = overlap_group, values_from = overlap) %>% 
  drop_na() %>% 
  left_join(year_rect_sum_biom)
pivot_wider: reorganized (overlap_group, overlap) into (Flounder
Small cod, Flounder
Large cod, Small cod
Large cod) [was 206x8, now 82x9]
drop_na (grouped): removed 20 rows (24%), 62 rows remaining
Joining with `by = join_by(year, ices_rect)`
left_join: added 3 columns (mean_fle, mean_scod, mean_mcod)
> rows only in x 0
> rows only in y (55)
> matched rows 62
> ====
> rows total 62

Fit zero inflated beta regressions using brms

#https://www.andrewheiss.com/blog/2021/11/08/beta-regression-guide/#zero-inflated-beta-regression-bayesian-style
  
# full model
ovr <- ovr %>%
  ungroup() %>% 
  clean_names() %>% 
  pivot_longer(c("flounder_small_cod", "flounder_large_cod", "small_cod_large_cod")) %>% 
  mutate(value = ifelse(value < 1e-15, 0, value),
         mean_biom_sc = as.numeric(scale(mean_fle + mean_scod + mean_mcod) / 3)#,
         )
ungroup: no grouping variables
pivot_longer: reorganized (flounder_small_cod, flounder_large_cod, small_cod_large_cod) into (name, value) [was 62x12, now 186x11]
zib_model <- bf(
  value ~ 0 + name*mean_biom_sc,
  phi ~ 0 + name,
  zi ~ 0 + name,
  family = zero_inflated_beta()
)

fit <- brm(
  formula = zib_model,
  data = ovr,
  cores = 4,
  chains = 4,
  iter = 4000
)
Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 15.0.0 (clang-1500.1.0.2.5)’
using SDK: ‘MacOSX14.2.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Start sampling
fit
 Family: zero_inflated_beta 
  Links: mu = logit; phi = log; zi = logit 
Formula: value ~ 0 + name * mean_biom_sc 
         phi ~ 0 + name
         zi ~ 0 + name
   Data: ovr (Number of observations: 186) 
  Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
         total post-warmup draws = 8000

Population-Level Effects: 
                                     Estimate Est.Error l-95% CI u-95% CI Rhat
nameflounder_large_cod                  -2.53      0.15    -2.82    -2.21 1.00
nameflounder_small_cod                  -1.68      0.13    -1.94    -1.41 1.00
namesmall_cod_large_cod                 -0.82      0.15    -1.11    -0.52 1.00
mean_biom_sc                            -0.28      0.38    -1.09     0.38 1.00
nameflounder_small_cod:mean_biom_sc      0.88      0.50    -0.07     1.87 1.00
namesmall_cod_large_cod:mean_biom_sc    -0.37      0.63    -1.63     0.86 1.00
phi_nameflounder_large_cod               2.24      0.21     1.81     2.62 1.00
phi_nameflounder_small_cod               1.82      0.19     1.43     2.16 1.00
phi_namesmall_cod_large_cod              0.80      0.17     0.47     1.12 1.00
zi_nameflounder_large_cod               -4.68      1.28    -7.81    -2.77 1.00
zi_nameflounder_small_cod               -4.69      1.30    -7.78    -2.79 1.00
zi_namesmall_cod_large_cod              -4.68      1.33    -7.82    -2.76 1.00
                                     Bulk_ESS Tail_ESS
nameflounder_large_cod                   5741     5093
nameflounder_small_cod                   7314     5365
namesmall_cod_large_cod                  7696     6077
mean_biom_sc                             4840     4559
nameflounder_small_cod:mean_biom_sc      5135     4777
namesmall_cod_large_cod:mean_biom_sc     5643     4857
phi_nameflounder_large_cod               5312     5321
phi_nameflounder_small_cod               6836     5789
phi_namesmall_cod_large_cod              7397     5597
zi_nameflounder_large_cod                6500     3389
zi_nameflounder_small_cod                5165     2753
zi_namesmall_cod_large_cod               6506     3694

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
zi_intercept <- tidy(fit, effects = "fixed") %>% 
  filter(component == "zi") %>% 
  pull(estimate) 
Warning in tidy.brmsfit(fit, effects = "fixed"): some parameter names contain
underscores: term naming may be unreliable!
filter: removed 9 rows (75%), 3 rows remaining
# Transformed to a probability/proportion
plogis(zi_intercept)*100 # percent
 b_zi_nameflounder_large_cod  b_zi_nameflounder_small_cod 
                   0.9154595                    0.9084557 
b_zi_namesmall_cod_large_cod 
                   0.9207962 
# Compare with data
ovr %>% 
  count(value == 0) %>% 
  mutate(prop = n / sum(n) *100)
count: now 2 rows and 2 columns, ungrouped
# A tibble: 2 × 3
  `value == 0`     n  prop
  <lgl>        <int> <dbl>
1 FALSE          183 98.4 
2 TRUE             3  1.61
get_variables(fit)
 [1] "b_nameflounder_large_cod"              
 [2] "b_nameflounder_small_cod"              
 [3] "b_namesmall_cod_large_cod"             
 [4] "b_mean_biom_sc"                        
 [5] "b_nameflounder_small_cod:mean_biom_sc" 
 [6] "b_namesmall_cod_large_cod:mean_biom_sc"
 [7] "b_phi_nameflounder_large_cod"          
 [8] "b_phi_nameflounder_small_cod"          
 [9] "b_phi_namesmall_cod_large_cod"         
[10] "b_zi_nameflounder_large_cod"           
[11] "b_zi_nameflounder_small_cod"           
[12] "b_zi_namesmall_cod_large_cod"          
[13] "lprior"                                
[14] "lp__"                                  
[15] "accept_stat__"                         
[16] "stepsize__"                            
[17] "treedepth__"                           
[18] "n_leapfrog__"                          
[19] "divergent__"                           
[20] "energy__"                              
plot(fit)

# https://discourse.mc-stan.org/t/trying-to-understand-default-prior-for-the-hurdle-part/24337
priors <- prior_summary(fit)
priors
  prior class                                 coef group resp dpar nlpar lb ub
 (flat)     b                                                                 
 (flat)     b                         mean_biom_sc                            
 (flat)     b               nameflounder_large_cod                            
 (flat)     b               nameflounder_small_cod                            
 (flat)     b  nameflounder_small_cod:mean_biom_sc                            
 (flat)     b              namesmall_cod_large_cod                            
 (flat)     b namesmall_cod_large_cod:mean_biom_sc                            
 (flat)     b                                                  phi            
 (flat)     b               nameflounder_large_cod             phi            
 (flat)     b               nameflounder_small_cod             phi            
 (flat)     b              namesmall_cod_large_cod             phi            
 (flat)     b                                                   zi            
 (flat)     b               nameflounder_large_cod              zi            
 (flat)     b               nameflounder_small_cod              zi            
 (flat)     b              namesmall_cod_large_cod              zi            
       source
      default
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
 (vectorized)
      default
 (vectorized)
 (vectorized)
 (vectorized)
      default
 (vectorized)
 (vectorized)
 (vectorized)
stancode(fit)
// generated with brms 2.20.4
functions {
  /* zero-inflated beta log-PDF of a single response
   * Args:
   *   y: the response value
   *   mu: mean parameter of the beta distribution
   *   phi: precision parameter of the beta distribution
   *   zi: zero-inflation probability
   * Returns:
   *   a scalar to be added to the log posterior
   */
   real zero_inflated_beta_lpdf(real y, real mu, real phi, real zi) {
     row_vector[2] shape = [mu * phi, (1 - mu) * phi];
     if (y == 0) {
       return bernoulli_lpmf(1 | zi);
     } else {
       return bernoulli_lpmf(0 | zi) +
              beta_lpdf(y | shape[1], shape[2]);
     }
   }
  /* zero-inflated beta log-PDF of a single response
   * logit parameterization of the zero-inflation part
   * Args:
   *   y: the response value
   *   mu: mean parameter of the beta distribution
   *   phi: precision parameter of the beta distribution
   *   zi: linear predictor for zero-inflation part
   * Returns:
   *   a scalar to be added to the log posterior
   */
   real zero_inflated_beta_logit_lpdf(real y, real mu, real phi, real zi) {
     row_vector[2] shape = [mu * phi, (1 - mu) * phi];
     if (y == 0) {
       return bernoulli_logit_lpmf(1 | zi);
     } else {
       return bernoulli_logit_lpmf(0 | zi) +
              beta_lpdf(y | shape[1], shape[2]);
     }
   }
  // zero-inflated beta log-CCDF and log-CDF functions
  real zero_inflated_beta_lccdf(real y, real mu, real phi, real zi) {
    row_vector[2] shape = [mu * phi, (1 - mu) * phi];
    return bernoulli_lpmf(0 | zi) + beta_lccdf(y | shape[1], shape[2]);
  }
  real zero_inflated_beta_lcdf(real y, real mu, real phi, real zi) {
    return log1m_exp(zero_inflated_beta_lccdf(y | mu, phi, zi));
  }
}
data {
  int<lower=1> N;  // total number of observations
  vector[N] Y;  // response variable
  int<lower=1> K;  // number of population-level effects
  matrix[N, K] X;  // population-level design matrix
  int<lower=1> K_phi;  // number of population-level effects
  matrix[N, K_phi] X_phi;  // population-level design matrix
  int<lower=1> K_zi;  // number of population-level effects
  matrix[N, K_zi] X_zi;  // population-level design matrix
  int prior_only;  // should the likelihood be ignored?
}
transformed data {
}
parameters {
  vector[K] b;  // regression coefficients
  vector[K_phi] b_phi;  // regression coefficients
  vector[K_zi] b_zi;  // regression coefficients
}
transformed parameters {
  real lprior = 0;  // prior contributions to the log posterior
}
model {
  // likelihood including constants
  if (!prior_only) {
    // initialize linear predictor term
    vector[N] mu = rep_vector(0.0, N);
    // initialize linear predictor term
    vector[N] phi = rep_vector(0.0, N);
    // initialize linear predictor term
    vector[N] zi = rep_vector(0.0, N);
    mu += X * b;
    phi += X_phi * b_phi;
    zi += X_zi * b_zi;
    mu = inv_logit(mu);
    phi = exp(phi);
    for (n in 1:N) {
      target += zero_inflated_beta_logit_lpdf(Y[n] | mu[n], phi[n], zi[n]);
    }
  }
  // priors including constants
  target += lprior;
}
generated quantities {
}
pp_check(fit) +
  coord_cartesian(expand = TRUE) +
  labs(x = "Schoener's overlap index",
       y = "Density") + 
  theme(legend.position.inside = c(0.8, 0.8)) + 
  guides(color = guide_legend(position = "inside"))
Using 10 posterior draws for ppc type 'dens_overlay' by default.
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.

ggsave(paste0(home, "/figures/supp/schoener_zib_pp_check.pdf"), width = 11, height = 11, units = "cm")

conditional_effects(fit)

set.seed(123)

ovr <- ovr %>% 
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2))

p1 <- ovr %>%
  data_grid(name = unique(name),
            mean_biom_sc = 0) %>%
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) %>% 
  add_epred_draws(fit) %>%
  ggplot(aes(.epred, name2)) +
  stat_pointinterval(.width = c(.66, .95)) + 
  geom_vline(xintercept = 0.6, linetype = 2, alpha = 0.6) +
  geom_jitter(data = ovr, aes(value, name2), height = 0.2, width = 0, alpha = 0.3, color = "grey50") +
  labs(y = "Schoener's overlap index", x = "") +
  geom_stripped_rows() +
  coord_cartesian(expand = 0, xlim = c(-0.03, 1)) +
  theme(plot.margin = unit(c(0, 0.3, 0, 0), "cm")) +
  NULL

pal <- brewer.pal(name = "RdBu", n = 7)

# Check CI's
fit %>%
  spread_draws(b_mean_biom_sc,
               `b_nameflounder_small_cod:mean_biom_sc`,
               `b_namesmall_cod_large_cod:mean_biom_sc`) %>%
  mutate("Flounder\nLarge cod" = b_mean_biom_sc,
         "Flounder\nSmall cod" = b_mean_biom_sc + `b_nameflounder_small_cod:mean_biom_sc`,
         "Small cod\nLarge cod" = b_mean_biom_sc + `b_namesmall_cod_large_cod:mean_biom_sc`,
         ) %>% 
  summarise(mean = mean(`Small cod\nLarge cod`),
            upr = quantile(`Small cod\nLarge cod`, probs = 0.845))
summarise: now one row and 2 columns, ungrouped
# A tibble: 1 × 2
    mean    upr
   <dbl>  <dbl>
1 -0.653 -0.163
p2 <- fit %>%
  spread_draws(b_mean_biom_sc,
               `b_nameflounder_small_cod:mean_biom_sc`,
               `b_namesmall_cod_large_cod:mean_biom_sc`) %>%
  mutate("Flounder\nLarge cod" = b_mean_biom_sc,
         "Flounder\nSmall cod" = b_mean_biom_sc + `b_nameflounder_small_cod:mean_biom_sc`,
         "Small cod\nLarge cod" = b_mean_biom_sc + `b_namesmall_cod_large_cod:mean_biom_sc`,
         ) %>%
  pivot_longer(c(`Flounder\nLarge cod`, `Flounder\nSmall cod`, `Small cod\nLarge cod`),
               names_to = "Overlap group", values_to = "slope") %>% 
  ggplot(aes(slope, `Overlap group`, fill = after_stat(x < 0))) +
  ggdist::stat_eye() +
  labs(x = "", y = "Slope of biomass density\non the mean Schoener's diet overlap") +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.6) +
  scale_fill_manual(values = c("gray80", alpha(pal[6], 0.8))) + 
  theme(legend.position.inside = c(0.15, 0.1)) + 
  guides(fill = guide_legend(position = "inside")) +
  geom_stripped_rows() +
  coord_cartesian(expand = 0) +
  NULL
pivot_longer: reorganized (Flounder
Large cod, Flounder
Small cod, Small cod
Large cod) into (Overlap group, slope) [was 8000x9, now 24000x8]
p2

# Conditional
p3 <- ovr %>%
  data_grid(name = unique(name),
            mean_biom_sc = seq_range(mean_biom_sc, n = 101)) %>%
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) %>% 
  add_epred_draws(fit) %>%
  ggplot(aes(x = mean_biom_sc, y = value, fill = name2, color = name2)) +
  stat_lineribbon(aes(y = .epred), .width = c(.95), alpha = 1/4) +
  geom_point(data = ovr, alpha = 0.5) +
  scale_fill_brewer(palette = "Pastel1") +
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position.inside = c(0.90, 0.84)) + 
  labs(y = "Schoener's overlap index", x = "Scaled cod and flounder mean biomass density",
       fill = "", color = "") + 
  guides(fill = guide_legend(position = "inside"))

pp <- (p1 + p2) + plot_layout(axes = "collect")

pp / p3 +
  plot_annotation(tag_levels = "A")

ggsave(paste0(home, "/figures/schoener_zib.pdf"), width = 17, height = 20, units = "cm")

# What's the actual predictions
sum <- ovr %>%
  data_grid(name = unique(name),
            mean_biom_sc = 0) %>%
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) %>% 
  add_epred_draws(fit) %>% 
  group_by(name) %>% 
  summarise(mean = mean(.epred))
summarise: now 3 rows and 2 columns, ungrouped
sum
# A tibble: 3 × 2
  name                  mean
  <chr>                <dbl>
1 flounder_large_cod  0.0735
2 flounder_small_cod  0.156 
3 small_cod_large_cod 0.302 
p1 + geom_vline(data = sum, aes(xintercept = mean))

knitr::knit_exit()